library(ggplot2)
library(ggpubr)
library(ggVennDiagram)
library(VennDiagram)
library(scales)
library(stringr)

setwd("D:/software/R/Rtemp")

workpath = "C:/Users/ab998/OneDrive - University of Exeter/TDP43 project JG/AIO/project11219"

#########################

#data loading and filtering

#########################

#c stands for cluster (significance). These tables give us an associated padj value for each cluster (i.e junction) - between the control and test samples, was there a significant difference in splicing events at this junction. These clusters have also been mapped to genes.
#for our purposes, we only need to use the cluster, padj and genes columns

tdp.c <- read.table(paste(workpath, "/leafcutter/d40/tdp43_leafcutter_cluster_significance.txt", sep=""), sep = "\t", header = TRUE)
tdp.ef <- read.table(paste(workpath, "/leafcutter/d40/tdp43_leafcutter_effect_sizes.txt", sep=""), sep = "\t", header = TRUE)

####
merge_df_c_ef<-function(df.c, df.ef){
  clusters = sapply(1:dim(df.ef)[1], function(i){ tmpvec = unlist(strsplit(df.ef$intron[i], ":"));
  paste(tmpvec[1], tmpvec[4], sep=":") })
  
  df.ef$genes = df.c$genes[match(clusters, df.c$cluster)] 
  df.ef$p.adjust = df.c$p.adjust[match(clusters, df.c$cluster)] 
  rm(clusters)
  return(df.ef)
}

tdp.ef = merge_df_c_ef(tdp.c, tdp.ef)
rm(tdp.c)


extract_genes <- function(df){
  
  genes <- str_split(df$genes, pattern = ",")
  names(genes) <- df$intron
  genes = unlist(sapply(1:length(genes), function(i){ paste(names(genes[i]), genes[[i]], sep=":Gene:")  }))

  tmpvec = unlist(sapply(genes, function(g){ strsplit(g, ":Gene:")   }))
  
  new.df = data.frame(matrix(tmpvec, ncol=2, byrow = T))
  colnames(new.df) = c("intron", "genes")
  
  new.df$padj <- df$p.adjust[match(new.df$intron, df$intron)]
  new.df$deltapsi <- df$deltapsi[match(new.df$intron, df$intron)]
  return(new.df)
}


tdp.df = extract_genes(tdp.ef)
pvalthresh = 1
psithresh = 0

gene.sig.tdp = na.omit(unique(tdp.df$genes[which(abs(tdp.df$deltapsi) > psithresh & tdp.df$padj < pvalthresh)]))

gene.ind = sapply(1:length(gene.sig.tdp), function(i){  rowind = which(tdp.df$genes == gene.sig.tdp[i])
                                   maxpsi.ind = which.max(abs(tdp.df$deltapsi[rowind]))
                                   rowind[maxpsi.ind]
                                })


df = tdp.df[gene.ind, ]

#################################

library(ggrepel)

goi = c("STMN2", "UNC13A", "ELAVL3", "PFKP", "IGSF21", "ACTL6B", "CYFIP2", "FEZ1", "CELF5", "CACNA1E", "KCNQ2", "TRAPPC12")
pvalthresh = 0.01
psithresh = 0.1

df$de = "NC"
df$de[which(df$deltapsi > psithresh & df$padj < pvalthresh)] = "Inclusion"
df$de[which(df$deltapsi < -1*psithresh & df$padj < pvalthresh)] = "Exclusion"

df$p = -1*log10(df$padj)

mycolors <- c("blue", "red", "gray")
names(mycolors) <- c("Exclusion", "Inclusion", "NC")


df.subset = df[match(goi, df$genes),]

p = ggplot(data=df) +
  geom_point(aes(x=deltapsi, y=p, color=de, size=de)) + 
  geom_hline(yintercept= 2, col="magenta") +
  geom_vline(xintercept= c(-0.1, 0.1), col="coral2") +
  scale_colour_manual(values = mycolors) +
  scale_size_manual(values = c(2, 2, 1)) +
  scale_x_continuous(limits = c(-1, 1), expand = c(0, 0)) +
  scale_y_continuous(limits = c(0, 60))



p = p +  theme_bw() + 
  theme(legend.title=element_blank(), legend.text = element_text(colour="black", size = 1), legend.position = "right") +
  guides(colour = guide_legend(override.aes = list(size=4))) + 
  theme(axis.text.x = element_text(face="bold", color="black", size=16, angle=0)) +
  theme(axis.text.y = element_text(face="bold", color="black", size=16, angle=0)) +
  labs(x="Delta Psi", y="P-value") +
  theme(axis.title = element_text(size = 20)) +
  theme(aspect.ratio = 1)


#q = p +  geom_text_repel(data = df.subset, 
#                         aes(x=deltapsi, y=p, label=genes), size = 4,
#                         nudge_y=-0.6, hjust=1.5, direction="x",
#                         segment.color="green", segment.size=1,
#                         min.segment.length = 0.05)

p

tiff(paste(workpath, "/figures/AIO-TDP43-model-d40-Splicing-Volcano.tiff", sep=""), res=300, compression = "jpeg", width = 20, height = 20, unit = "cm")#
#par(mar=c(1,1,1,1))
p
dev.off()

######  GO of the spliced genes

library(DOSE)
library(clusterProfiler)
library(pathview)

pvalthresh = 0.01
psithresh = 0.1

goi = na.omit(unique(tdp.df$genes[which(abs(tdp.df$deltapsi) > psithresh & tdp.df$padj < pvalthresh)]))
allgenes = na.omit(unique(tdp.df$genes))

for(ont in c("BP", "CC", "MF")){
  
  print(ont)
  
  ego <- enrichGO(gene = goi, 
                  universe = allgenes,
                  keyType = "SYMBOL",
                  OrgDb = org.Hs.eg.db, 
                  ont = ont, 
                  pAdjustMethod = "BH", 
                  pvalueCutoff = 0.5,
                  qvalueCutoff = 0.5, 
                  readable = TRUE)
  
  cluster_summary <- data.frame(ego)
  cluster_summary = cluster_summary[which(cluster_summary$Count >=10), ]
  cluster_summary$padj = p.adjust(cluster_summary$pvalue, method="BH")
  
  filename = paste(workpath, "/analysis/AIO-TDP43-d40",  ".", ont, ".GO.txt", sep="")
  
  write.table(cluster_summary, filename, sep="\t", row.names = F, col.names = T, quote = F)
  
}







































